library(tidyverse)
library(readr)
library(dplyr)
library(ggplot2)
library(stringr)
library(cowplot)
library(RColorBrewer)
library(purrr)
library(shiny)
library(DT)

2´,3´-cyclic phosphate RNA-sequencing libraries were prepared from mouse macrophages: B6 (WT) and RNaseL KO. These cells were either mock infected or infected with WT MHV virus (from Volker), MHV mutated to inactivate ns2 (phosphodiesterase) activity, or MHV mutated to inactivate nsp15 (endoribonuclease) activity for 9 and 12 hours.

sample_info_table <- read_tsv("sample_info_exp2.txt", col_names = c("cell type", "virus type", "time"))
datatable(sample_info_table)

These libraries were processed to generate bedgraph files containing sites of 3´-cleavage and the associated dinucleotide in the MHV (+) strand RNA (mRNA) and (-) strand RNA (genomic).

Coverage plots for all cell types by type of viral infection

These plots show the normalized counts of captured 2´,3´-cyclic phosphates captured (% of total cDNA reads for each library) per nucleotide position in the the MHV (+) strand reads.

#MHV (+) strand coverage plots 

data_dir_neg = "~/Dropbox (Hesselberth Lab)/Rachel_data/EndoU_project/viral_bg/neg_exp2"
data_files_neg = list.files(data_dir_neg, full.names = T)

read_file <- function(x) {
  df <- readr::read_tsv(x, col_names = c("chrom", "start", "end", "count", "normalized_count", "dinuc"))
  df$name <- basename(x)
  df
}

neg_table <- purrr::map_df(data_files_neg, read_file) %>%
  mutate(name = str_replace(name, ".mhv.neg.dinuc.bg", "")) %>%
  separate(name, into = c('cell', 'virus', 'time'), sep = '_')

datatable(neg_table)
neg_plot <- ggplot(neg_table, aes(x = start, y = normalized_count)) +
    geom_line(aes(color = time)) +
    scale_colour_brewer(palette="Set1") + 
    theme_cowplot() + 
    facet_grid(virus ~ cell) +
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
    labs(x="Position", y="Normalized counts") +
    scale_x_continuous(breaks=seq(0, 30000, 5000)) + 
    ggtitle("MHV (+) strand coverage plots")
neg_plot

#MHV (-) strand coverage plots

data_dir_pos = "~/Dropbox (Hesselberth Lab)/Rachel_data/EndoU_project/viral_bg/pos_exp2"
data_files_pos = list.files(data_dir_pos, full.names = T)

pos_table <- purrr::map_df(data_files_pos, read_file) %>%
  mutate(name = str_replace(name, ".mhv.pos.dinuc.bg", "")) %>%
  separate(name, into = c('cell', 'virus', 'time'), sep = '_')

datatable(pos_table)
pos_plot <- ggplot(pos_table, aes(x = start, y = normalized_count)) +
    geom_bar(aes(fill = time), stat = "identity", position = 'dodge') +
    scale_fill_brewer(palette = 'Set1') + 
    theme_cowplot() + 
    facet_grid(virus ~ cell) + 
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
    labs(x="Position", y="Counts") +
    scale_x_continuous(breaks=seq(0, 30000, 5000)) +
    ggtitle("MHV (-) strand coverage plots")
pos_plot

Discussion of coverage analysis:

  1. We do not observe significant capture of MHV RNA from any of the mock infected samples
  2. In general, we see more MHV “cleavage” (aka 2´,3´-cyclic phosphate capture) at 12 hpi vs. 9 hpi with WT virus, which may reflect greater viral RNA abundance at the later time point.
  3. There is less signal in the viral samples infected with mutant nsp15, which may reflect decreased viral RNA abundance.
  4. The IFNAR KO and RNaseL KO samples are similar, but different. This may reflect direct vs. infdirect effects of dsRNA sensing pathways and effectors on MHV detection.
  5. Overall, the patterns between experiment 1 and experiment 2 are similar, except that there seems to be more cleavage in RNaseL KO and nsp15 mutant + strand RNA in experiment 2 than experiment 1. This doesn’t make a lot of sense considering that in the abscence of both RNaseL and Nsp15 there should be less cleavage of viral RNA and this was observed in experiment 1.

Identifying endonuclease specific cleavage by subtractive analysis

RNaseL substractive coverage plots of MHV (+) strand reads for all cell types by type of viral infection

The normalized counts detected in RNaseL KO cells for each type of viral infection were subtracted from the normalized counts detected in WT B6 cells. This substractive analysis will emphasize RNaseL dependent cleavage events by reporting signals that occur only in the presence of RNaseL.

#RNase L substractive analysis to remove signal that occurs in the abscence of RNAse L by cell type 

subtractive_neg <- neg_table %>% 
  dplyr::select(-count, -dinuc) %>% 
  spread(cell, normalized_count) 
subtractive_neg[is.na(subtractive_neg)] <- 0

subtractive_neg <- subtractive_neg %>% 
  mutate(B6_RNaseLdep = B6 - RNaseL) %>% 
  dplyr::select(start:end, virus:time, B6_RNaseLdep) %>% 
  gather(cell, normalized_count, B6_RNaseLdep)
                                      
#Plots of RNase L dependent sites 
subneg_plot <- ggplot(subtractive_neg, aes(x = start, y = normalized_count)) +
    geom_line(aes(color = time)) + 
    scale_color_brewer(palette = 'Set1') + 
    theme_cowplot() +
    facet_grid(virus ~ cell) + 
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) + 
    labs(x="Position", y="Normalized counts") +
    ylim(0, 0.2) + 
    scale_x_continuous(breaks=seq(0, 30000, 5000)) +
    ggtitle("RNase L dependent-cleavage sites analysis")
subneg_plot

EndoU mutant (nsp15) substractive coverage analysis of MHV (+) strand for all cell types by type of viral infection

This is very similar to the analysis above, expect it will substract signal occuring in the abscence of nsp15 actviity, which will emphasize sites specific to nsp15 cleavage actviity.

# nsp15 substractive analysis to remove signal that occurs in the abscence of nsp15 activity by viral infection 

subtractive_nsp15 <- neg_table %>% 
  dplyr::select(-count, -dinuc) %>% 
  spread(virus, normalized_count)
subtractive_nsp15[is.na(subtractive_nsp15)] <- 0

subtractive_nsp15 <- subtractive_nsp15 %>% 
  mutate(MHVV_nsp15dep = MHVV - nsp15) %>% 
  mutate(ns2_nsp15dep = ns2 - nsp15) %>% 
  dplyr::select(start:end, cell:time, MHVV_nsp15dep:ns2_nsp15dep) %>% 
  gather(virus, normalized_count, MHVV_nsp15dep:ns2_nsp15dep)

#Plots of nsp15 dependent sites 
                         
subnsp15_plot <- ggplot(subtractive_nsp15, aes(x = start, y = normalized_count, fill = time)) +
    geom_line(aes(color = time)) + 
    scale_colour_brewer(palette = 'Set1') + 
    theme_cowplot() +
    facet_grid(virus ~ cell) + 
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) + 
    labs(x="Position", y="Normalized counts") +
    ylim(0, 0.15) + 
    scale_x_continuous(breaks=seq(0, 30000, 5000)) +
    ggtitle("Nsp15 dependent-cleavage sites analysis")
subnsp15_plot

Discussion of substractive RNase L and Nsp15 analysis:

We have captured signal specific to RNase L and Nsp15 cleavage of MHV + strand RNA. This is evidenced by the signal that remains in nsp15-mutant RNase L WT cells and RNase L KO cells with WT nsp15.

Looking specifically at the nsp15-subtractive analysis, the nsp15 dependent sites in experiment 2 are similar to experiment 1, except that there are more pronounced differences between WT infection and ns2 infection in both WT and RNaseL KO cells.

Examining specific sites of cleavage in MHV (+) strand RNA

Overall, these sites identified as interest in experiment 1 look similar in location and signal here, except for site 18645 which is captured at a much lower frequency in experiment 2.

Site18645: Previously identified by Dave Barton as potentially a site of interest. Dinucleotide is “GC” at 18645 (GA in reference, but GC in viral RNA). There is also a site of clevage with smaller signal at 18648, which is “UU”

Site 26080 and 26087: 26080 = UC, 26087 = CU

Site25404: 25404 = UC

Site 26147 and 26148: 26147 = UC, 26147 = UU

#Site18645-Previously identified in unpublished data from Dave Barton as potentially a site of interest. Dinucleotide is "GC" at 18645 (GA in reference, but GC in viral RNA). There is also a site of clevage with smaller signal at 18648, which is "UU" 

neg_plot_18645 <- ggplot(neg_table, aes(x = start, y = normalized_count, fill = time)) +
    geom_bar(stat = "identity", position = 'dodge') +
    scale_fill_brewer(palette="Set1") + 
    theme_cowplot() + 
    facet_grid(virus ~ cell) + 
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) + 
    labs(x="Position", y="Normalized counts") +
    xlim(18640, 18660) +
    ggtitle("MHV (+) strand coverage plot: sites 18645 and 18648")
neg_plot_18645

#Site 26080 and 26087- identified by visual inspection of the data, 26080 = UC, 26087 = CU

neg_plot_26080 <- ggplot(neg_table, aes(x = start, y = normalized_count, fill = time)) +
    geom_bar(stat = "identity", position = 'dodge') +
    scale_fill_brewer(palette="Set1") + 
    theme_cowplot() + 
    facet_grid(virus ~ cell) + 
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) + 
    labs(x="Position", y="Normalized counts") +
    xlim(26075, 26090) +
    ggtitle("MHV (+) strand coverage plot: sites 26080 and 26087")
neg_plot_26080 

#Site25404 - identified by visual inspection of the data, 25404 = UC

neg_plot_25404 <- ggplot(neg_table, aes(x = start, y = normalized_count, fill = time)) +
    geom_bar(stat = "identity", position = 'dodge') +
    scale_fill_brewer(palette="Set1") + 
    theme_cowplot() + 
    facet_grid(virus ~ cell) + 
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) + 
    labs(x="Position", y="Normalized counts") +
    xlim(25390, 25410) +
    ggtitle("MHV (+) strand coverage plot: site 25404")
neg_plot_25404

#Site 26147 and 26148 - identified by visual inspection of the data, 26147 = UC, 26147 = UU

neg_plot_26148 <- ggplot(neg_table, aes(x = start, y = normalized_count, fill = time)) +
    geom_bar(stat = "identity", position = 'dodge') +
    scale_fill_brewer(palette="Set1") + 
    theme_cowplot() + 
    facet_grid(virus ~ cell) + 
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) + 
    labs(x="Position", y="Normalized counts") +
    xlim(26142, 26150) +
    ggtitle("MHV (+) strand coverage plot: sites 26147 and 26148")
neg_plot_26148

Identifying “significant” sites of nsp15 cleavage

This analysis calculates which potential cleavage sites have the greatest fold change in cleavage signal between samples infected with WT virus and samples infected with nsp15 mutant virus. Cleavage sites where signal captured during WT infection decreases the most during infection with nsp15 mutant virus may be “real” sites of MHV endoU cleavage acitivity.

Calculating “fold change” across all sites is an unbiased approach to identify potential nsp15 cleavage sites. An alternative approach, focuses on fold change at sites with the greatest signal in the libary.

Top cleavage sites by normalized count for cell type and viral infection.

This analysis identifies the top sites 50 sites by normalized count stratified by cell type and viral infection.

top <- neg_table %>%
  group_by(cell, virus) %>%
  arrange(desc(normalized_count)) %>%
  do(head(., n = 50))

datatable(top)
top_plot <- top %>%
  ggplot(aes(x = start, y = normalized_count, fill = time, width=300)) +
    geom_bar(stat = "identity", position = 'identity') +
    scale_fill_brewer(palette="Set1") + 
    theme_cowplot() + 
    facet_grid(virus ~ cell) +
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
    labs(x="Position", y="Normalized counts")
top_plot

The overall pattern of cleavage for the top 50 sites by normalized count is similar between experiment 1 and experiment 2. However, in experiment 2 there is more signal in nsp15 infections (both WT and RNaseL KO cells) which suggests there is more non-specific cleavage of the viral RNA in this experiment.

Fold-change for RNaseL

In this analysis, all positions in the MHV positive strand RNA with more than 20 reads were queried to identify sites with equal to or greater than a 5-fold decrease in signal comparing B6 WT cells to RNaseL KO cells (B6/RNaseL).

This analysis is independent of viral infection and just asks which sites change the most when RNaseL is absent.

change_RNaseL <- neg_table %>% 
  filter(count >= 20) %>%
  dplyr::select(-count, -dinuc) %>% 
  spread(cell, normalized_count)
change_RNaseL[is.na(change_RNaseL)] <- 0

change_RNaseL <- change_RNaseL %>% 
  mutate(B6_change = B6/RNaseL) %>% 
  gather(cell_comp, fold_change, B6_change) %>%
  filter(fold_change > 0 & fold_change != Inf) %>%
  filter(fold_change >= 5) %>%
  group_by(start) %>%
  unique() %>%
  arrange(desc(fold_change))

datatable(change_RNaseL)
change_RNaseL_graph <- change_RNaseL %>% 
  ggplot(aes(x = start, y = fold_change, fill = cell_comp, width=100)) +
  geom_bar(stat = "identity", position = 'identity') + 
  scale_fill_brewer(palette="Set1") +
  facet_grid(virus ~ cell_comp) +
  theme_cowplot() +
  labs(x="Position", y="fold_change") +
  scale_x_continuous(breaks=seq(0, 30000, 5000)) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))
change_RNaseL_graph

There are 2 important observations from this data:

  1. There are more sites with greater than 5-fold decrease in signal in experiment 2 and the fold-change ranges are generally higher. This relates to the fact that I was able to find many overlapping sites comparing experiment 1 to experiment 2. However, less of these sites in experiment 2 were also captured in exp. 1

  2. In exp.2 there were no sites captured during nsp15 infection with a greater than 5-fold decrease in signal in the abscence of RNaseL. In exp.1, there were very few sites captured. This strongly suggests that abscence of nsp15 leads altered RNaseL activity.

Fold-change for Nsp15 mutant reads

In this analysis, all positions in the MHV positive strand RNA with more than 20 reads were queried to identify sites with equal to or greater than a 5-fold decrease in signal comparing cells infected with WT/ns2 mutant virus to nsp15 mutant virus (MHV WT/nsp15 and ns2/nsp15).

This analysis is independent of cells type and just asks which sites change the most when nsp15 activity is lost. Thus, these sites are not independent of RNaseL actviity.

change_nsp15 <- neg_table %>% 
  filter(count >= 20) %>%
  dplyr::select(-count, -dinuc) %>% 
  spread(virus, normalized_count)
change_nsp15[is.na(change_nsp15)] <- 0

change_nsp15 <- change_nsp15 %>% 
  mutate(MHVV_change = MHVV/nsp15) %>% 
  mutate(ns2_change = ns2/nsp15) %>% 
  gather(viral_comp, fold_change, MHVV_change:ns2_change) %>%
  filter(fold_change > 0 & fold_change != Inf) %>%
  filter(fold_change >= 5) %>%
  group_by(start) %>%
  unique() %>%
  arrange(desc(fold_change))

datatable(change_nsp15)
change_nsp15_graph <- change_nsp15 %>%
  ggplot(aes(x = start, y = fold_change, fill = viral_comp, width=100)) +
  geom_bar(stat = "identity", position = 'identity') + 
  scale_fill_brewer(palette="Set1") +
  facet_grid(cell ~ viral_comp) +
  theme_cowplot() +
  labs(x="Position", y="fold_change") +
  scale_x_continuous(breaks=seq(0, 30000, 5000)) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))
change_nsp15_graph

In this analysis, the patterns of cleavage between experiment 1 and 2 are very similar. There are less sites with greater than 5-fold decrease in cleavage during infection with nsp15. However, 8 of the top 10 sites by fold-change identified in exp.1 were also captured in exp.2.

Exp.2 also furthur emphasizes that loss of RNaseL results in decreased cleavage by nsp15. Additionally, the ns2 mutant also alters cleavage of MHV RNA as compared to WT virus.

Fold-change for Nsp15 muant and RNaseL KO reads

Same analysis as above focused in RNaseL KO cells.

change_nsp15 <- neg_table %>% 
  filter(count >= 20) %>%
  dplyr::select(-count, -dinuc) %>% 
  spread(virus, normalized_count)
change_nsp15[is.na(change_nsp15)] <- 0

change_nsp15RNaseL <- change_nsp15 %>% 
  mutate(MHVV_change = MHVV/nsp15) %>% 
  mutate(ns2_change = ns2/nsp15) %>% 
  gather(viral_comp, fold_change, MHVV_change:ns2_change) %>%
  filter(fold_change > 0 & fold_change != Inf) %>%
  filter(fold_change >= 5) %>%
  filter(cell == "RNaseL") %>%
  group_by(start) %>%
  unique() %>%
  arrange(desc(fold_change))

datatable(change_nsp15RNaseL)
change_nsp15RNaseL_graph <- change_nsp15RNaseL %>%
  ggplot(aes(x = start, y = fold_change, fill = viral_comp, width=100)) +
  geom_bar(stat = "identity", position = 'identity') + 
  scale_fill_brewer(palette="Set1") +
  facet_grid(cell ~ viral_comp) +
  theme_cowplot() +
  labs(x="Position", y="fold_change") +
  scale_x_continuous(breaks=seq(0, 30000, 5000)) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))
change_nsp15RNaseL_graph

This analysis reveals some of the biggest differences between exp.1 and exp.2; in experiment 1 there were 35 sites captured in RNaseL KO cells with a greater than 5-fold decrease in signal with nsp15 infection. However, only 5 sites were detected in exp.2 and none of these were captured in experiment 1. This may be related to the higher level of cleavage signal observed in RNaseL KO cells infected with nsp15 in experiment 2 relative to experiment 1.

Top RNaseL-dependent sites

In this analysis, the sites with the greatest normalized counts that are RNaseL dependent are identified. This relies on an inital subtractive analysis to emphasize signal that occurs when RNaseL is present. The top 50 sites by cell type that are RNaseL dependent are then identified. This data is filtered by sites with 20 or more reads.

top_RNaseL <- neg_table %>% 
  filter(count >= 20) %>%
  dplyr::select(-count, -dinuc) %>% 
  spread(cell, normalized_count) 
top_RNaseL[is.na(top_RNaseL)] <- 0

top_RNaseL <- top_RNaseL %>% 
  mutate(B6_RNaseLdep = B6 - RNaseL) %>% 
  dplyr::select(start:end, virus:time, B6_RNaseLdep) %>% 
  gather(cell, normalized_count, B6_RNaseLdep) %>%
  group_by(cell) %>%
  arrange(desc(normalized_count)) %>%
  do(head(., n = 50))
  
datatable(top_RNaseL)
top_RNaseL_graph <- 
  ggplot(top_RNaseL, aes(x = start, y = normalized_count, fill = cell, width=100)) +
  geom_bar(stat = "identity", position = 'identity') + 
  scale_fill_brewer(palette="Set1") +
  facet_grid(virus ~ cell) +
  theme_cowplot() +
  labs(x="Position", y="normalized_count") +
  scale_x_continuous(breaks=seq(0, 30000, 5000)) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))
top_RNaseL_graph

Fold change for top RNaseL-dependent sites

Using the top RNaseL-depenendent sites in a fold-change analaysis. This data is also filtered for sites with at least 20 or more reads and with a 5-fold or greater decrease in signal.

top_change_RNaseL <- inner_join(neg_table, top_RNaseL, by = "start") %>%
  filter(count >= 20) %>%
  dplyr::select(start, end.x, normalized_count.x, cell.x, virus.x, time.x) %>%
  unique() %>%
  spread(cell.x, normalized_count.x)
top_change_RNaseL[is.na(top_change_RNaseL)] <- 0

top_change_RNaseL <- top_change_RNaseL %>% 
  mutate(B6_change = B6/RNaseL) %>% 
  gather(cell, fold_change, B6_change) %>%
  filter(fold_change > 0 & fold_change != Inf) %>%
  filter(fold_change >= 5) %>%
  group_by(start) %>%
  unique() %>%
  arrange(desc(fold_change))

datatable(top_change_RNaseL)
top_change_RNaseL_graph <- top_change_RNaseL %>%
  ggplot(aes(x = start, y = fold_change, fill = cell, width=100)) +
  geom_bar(stat = "identity", position = 'identity') + 
  scale_fill_brewer(palette="Set1") +
  facet_grid(virus.x ~ cell) +
  theme_cowplot() +
  labs(x="Position", y="fold_change") +
  scale_x_continuous(breaks=seq(0, 30000, 5000)) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))
top_change_RNaseL_graph

There is good overlap between experiment 1 and experiment 2 with regard to RNaseL cleavage of MHV RNA by the above analysis.

Top nsp15-dependent sites

In this analysis, the sites with the greatest normalized counts that are nsp15-dependent are identified. This relies on an inital subtractive analysis to emphasize signal that occurs when nsp15 is present. The top 50 sites by cell type that are nsp15-dependent are then identified.

top_nsp15 <- neg_table %>% 
  filter(count >= 20) %>%
  dplyr::select(-count, -dinuc) %>% 
  spread(virus, normalized_count)
top_nsp15[is.na(top_nsp15)] <- 0

top_nsp15 <- top_nsp15 %>% 
  mutate(MHVV_nsp15dep = MHVV - nsp15) %>% 
  mutate(ns2_nsp15dep = ns2 - nsp15) %>% 
  dplyr::select(start:end, cell:time, MHVV_nsp15dep:ns2_nsp15dep) %>% 
  gather(virus, normalized_count, MHVV_nsp15dep:ns2_nsp15dep) %>%
  group_by(virus) %>%
  arrange(desc(normalized_count)) %>%
  do(head(., n = 50))

datatable(top_nsp15)
top_nsp15_graph <- 
  ggplot(top_nsp15, aes(x = start, y = normalized_count, fill = virus, width = 100)) +
  geom_bar(stat = "identity", position = 'identity') + 
  scale_fill_brewer(palette="Set1") +
  facet_grid(cell ~ virus) +
  theme_cowplot() +
  labs(x="Position", y="normalized_count") +
  scale_x_continuous(breaks=seq(0, 30000, 5000)) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))
top_nsp15_graph

Top nsp15-dependent sites fold change

Using the top nsp15-depenendent sites in a fold-change analaysis. This data is also filtered for sites with at least 20 or more reads and with a 5-fold or greater decrease in signal.

top_change_nsp15 <- inner_join(neg_table, top_nsp15, by = "start") %>%
  filter(count >= 20) %>%
  dplyr::select(start, end.x, normalized_count.x, cell.x, virus.x, time.x) %>%
  unique() %>%
  spread(virus.x, normalized_count.x)
top_change_nsp15[is.na(top_change_nsp15)] <- 0

top_change_nsp15 <- top_change_nsp15 %>% 
  mutate(MHVV_change = MHVV/nsp15) %>% 
  mutate(ns2_change = ns2/nsp15) %>% 
  mutate(mock_change = mock/nsp15) %>%
  gather(virus, fold_change, MHVV_change:mock_change) %>%
  filter(fold_change > 0 & fold_change != Inf) %>%
  filter(fold_change >= 5) %>%
  group_by(start) %>%
  unique() %>%
  arrange(desc(fold_change))

datatable(top_change_nsp15)
top_change_nsp15_graph <- 
  ggplot(top_change_nsp15, aes(x = start, y = fold_change, fill = virus, width = 100)) +
  geom_bar(stat = "identity", position = 'identity') + 
  scale_fill_brewer(palette="Set1") +
  facet_grid(cell.x ~ virus) +
  theme_cowplot() +
  labs(x="Position", y="fold_change") +
  scale_x_continuous(breaks=seq(0, 30000, 5000)) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))
top_change_nsp15_graph

Top nsp15-dependent and RNaseL-independent sites fold change

Same analysis as above focused on RNaseL KO cells.

top_change_nsp15RNaseL <- top_change_nsp15 %>% 
  mutate(MHVV_change = MHVV/nsp15) %>% 
  mutate(ns2_change = ns2/nsp15) %>% 
  mutate(mock_change = mock/nsp15) %>%
  gather(virus, fold_change, MHVV_change:mock_change) %>%
  filter(fold_change > 0 & fold_change != Inf) %>%
  filter(fold_change >= 5) %>%
  group_by(start) %>%
  filter(cell.x == "RNaseL") %>%
  unique() %>%
  arrange(desc(fold_change))


datatable(top_change_nsp15RNaseL)
top_change_nsp15RNaseL_graph <- 
  ggplot(top_change_nsp15RNaseL, aes(x = start, y = fold_change, fill = virus, width = 100)) +
  geom_bar(stat = "identity", position = 'identity') + 
  scale_fill_brewer(palette="Set1") +
  facet_grid(cell.x ~ virus) +
  theme_cowplot() +
  labs(x="Position", y="fold_change") +
  scale_x_continuous(breaks=seq(0, 30000, 5000)) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))
top_change_nsp15RNaseL_graph

There are overlapping positions of “significant” nsp15-dependent cleavage by normalized count and fold change analysis between the 2 experiemnts in B6 WT cells. This does not hold true for these analyses in RNaseL KO cells where there are almost no significant cleavages in experiment 2 under infection with WT cells. There are only 5 sites with a greater than 5-fold change in the abscence of nsp15 that are nsp15 dependent and RNaseL independent in exp.2, while experiment 1 had 35. None of the sights from experiment 2 were identified in experiment 1.

Calculating dinucelotide frequencies for all cell types by infection status

Performing a dinucleotide frequency analysis will help us to confirm RNaseL signatures as a positive control and investigate a potential sequence preference for nsp15 cleavage.

# Calculating freuqency of each dinucleotide for the + strand MHV reads 

dinuc <- c("AA","GA", "CA", "UA","AC", "CC", "GC", "UC", "AG", "CG", "GG", "UG", "AU", "CU", "GU", "UU")
df_dinuc <- data_frame(dinuc)

frequencies_neg <- function(x) {
  df <- readr::read_tsv(x, col_names = c("chrom", "start", "end", "count",      "normalized_count", "dinuc"))
  df <- aggregate(cbind(count) ~ dinuc, data = df, sum)
  df <- left_join(df_dinuc, df) %>% 
    replace_na(list(count = 0)) %>% 
    mutate(freq = count/sum(count)) 
  df$name <- basename(x)
  df %>% mutate(name = str_replace(name, ".mhv.neg.dinuc.bg", "")) %>%
  separate(name, into = c('cell', 'virus', 'time'))
}

neg_freq <- purrr::map_df(data_files_neg, frequencies_neg)

Plotting dinucleotide frequencies for all cell types by type of viral infection

These plots show the freuqency of cleavege at 3´-dinucleotides for the MHV (+) sense RNA from highest to lowest frequency. I removed the mock infected sample data from these graphs.

# Dincleotide freuqnecy plots 

neg_freq_plot <- filter(neg_freq, virus != "mock") %>%
  ggplot(aes(x = dinuc , y = freq)) + 
  scale_fill_brewer(palette = 'Set1') + 
  scale_x_discrete(limits=c("AA","GA", "CA", "UA","AC", "CC", "GC", "UC", "AG", "CG", "GG",   "UG", "AU", "CU", "GU", "UU")) + 
  geom_bar(aes(fill = time), stat = "identity", position = 'dodge') + 
  theme_cowplot() + 
  facet_grid(virus ~ cell) + 
  theme(axis.text.x = element_text(angle = 60, hjust = 1, size=8)) +
  ggtitle("MHV (+) strand dinucleotide frequency plots")
neg_freq_plot

RNase L substractive dinucleotide analysis of for all cell types by type of viral infection

This is similar to the previous coverage substractive analysis. These plots display dinucleotde cleavage frequencies dependent on RNase L by removing the frequencies detected in the abscence of RNase L.

# Subtractive RNaseL dinucleotide analysis

subtractive_neg <- neg_freq %>% 
  dplyr::select(dinuc, freq:time) %>% 
  spread(cell, freq) %>% 
  mutate(B6_RNaseLdep = B6 - RNaseL) %>% 
  dplyr::select(dinuc:time, B6_RNaseLdep) %>% 
  gather(cell, freq, B6_RNaseLdep)

# Plots 

subneg_freq_plot <- subtractive_neg %>% 
  filter(virus != "mock") %>%
  ggplot(aes(x = dinuc , y = freq)) + 
  scale_fill_brewer(palette = 'Set1') + 
  scale_x_discrete(limits=c("AA","GA", "CA", "UA","AC", "CC", "GC", "UC", "AG", "CG", "GG",     "UG", "AU", "CU", "GU", "UU")) + 
  geom_bar(aes(fill = time), stat = "identity", position = 'dodge') + 
  theme_cowplot() +
  facet_grid(virus ~ cell) + 
  theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 8)) +
  ggtitle("MHV (+) strand RNase L subtractive dinucleotide frequency analysis")
subneg_freq_plot

Nsp15 substractive dinucleotide analysis of for all cell types by type of viral infection

These plots indentify some dinucleotide cleavage events that might be dependent on nsp15 activity and independent of RNase L activity.

#Subtractive nsp15 dinucleotide analysis

subdinuc_neg_nsp15 <- neg_freq %>% 
  dplyr::select(dinuc, freq:time) %>% 
  spread(virus, freq) %>%
  mutate(MHVV_nsp15dep = MHVV - nsp15) %>% 
  mutate(ns2_nsp15dep = ns2 - nsp15) %>% 
  dplyr::select(dinuc:time, MHVV_nsp15dep:ns2_nsp15dep) %>% 
  gather(virus, freq, MHVV_nsp15dep:ns2_nsp15dep)

# Plots

subnegdinuc_freq_plot <- subdinuc_neg_nsp15 %>% 
  filter(virus != "mock") %>% 
  ggplot(aes(x = dinuc , y = freq)) + 
  scale_fill_brewer(palette = 'Set1') + 
  scale_x_discrete(limits=c("AA","GA", "CA", "UA","AC", "CC", "GC", "UC", "AG", "CG", "GG",   "UG", "AU", "CU", "GU", "UU")) + 
  geom_bar(aes(fill = time), stat = "identity", position = 'dodge') + 
  theme_cowplot() +
  facet_grid(virus ~ cell) + 
  theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 8)) +
  #scale_y_continuous(limits = c(0.00, 0.10)) +
  ggtitle("MHV (+) strand nsp15 subtractive dinucleotide frequency analysis")
subnegdinuc_freq_plot

Discussion of dinucleotide analysis

Experiment 2 is mostly consistent with the dinuclotide analysis from experiment 1. The most prominent cleavages are still consistent with known RNaseL dinucleotide preferences, UA and UU, especially predomiannt in the ns2 mutant virus infection.

In the general analysis, there is a overall preference for U >>> C >> A > G. The RNaseL specific analysis identifies UA as the predominantly cleaved dincleotide, which is also consistent with experiment 1. In the nsp15 focused analysis, in the B6 WT cells with WT virus there is a preference for pyrimidine cleavage. But, this pattern is disrupted in the ns2 mutant infection and in the RNaseL KO cells.